home *** CD-ROM | disk | FTP | other *** search
- %e 1500
- %p 3100
- %a 3000
-
- %{
- /* above directives increase the various table sizes used by Lex */
-
- /*
- *==========================================================================
- * Copyright 1991 Avinash Chopde, All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Avinash Chopde not be used in
- * advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission.
- * Avinash Chopde makes no representations about the suitability of this
- * software for any purpose.
- * It is provided "as is" without express or implied warranty.
- *
- * AVINASH CHOPDE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
- * IN NO EVENT SHALL AVINASH CHOPDE BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
- *
- * Author: Avinash Chopde, 1991
- * C2 Colonial Drive #4, Andover, MA 01810, USA.
- *
- */
-
- static char S_RCSID[] = "$Header: e:/itrans/src/rcs/ilex.l 1.8 91/10/15 00:13:49 avinash Exp $";
-
- #include "itrans.h"
- #include "ifm.h"
-
- #ifdef MSDOS
- #include "y_tab.h"
- #else
- #include "y.tab.h"
- #endif
-
- #ifdef YYSTYPE
- extern YYSTYPE yylval;
- #else
- extern int yylval;
- #endif
-
- extern int G_lineno;
- extern int in_letter(); /* returns TRUE if the parse (iyacc.y) is in the
- * middle of scanning some letter.
- * Used to decide whether to tell the parser to
- * stop looking for more characters, and
- * terminate the letter.
- * That is done by sending in a ENDPREV_TOK.
- */
- extern char G_endprev_str[]; /* defined in iyacc.y, used to pass string
- * to process
- */
-
- static S_lex_ichar(int t);
- static S_print(char* t);
- static S_skipcomments();
-
- static int S_lang_tok = -1; /* which language being scanned... */
-
- %}
-
- %START ITEXT DCOMMAND
- %%
-
- "\\%" {
- /* seen \% in the input, complete current
- * word first, and then echo out
- * the chars in G_endprev_str.
- */
- if (in_letter()) {
- S_print("ichar \\ %");
- strcpy(G_endprev_str, "\\%");
- return (yylval=ENDPREV_TOK);
- } else {
- ECHO;
- }
- }
-
- <ITEXT>"\\" {
- /* seen a single backslash */
- if (in_letter()) {
- /* need to complete current word, send
- * a ENDPREV token to the parser.
- */
- S_print("ichar \\");
- /* unput('\\'); ?? use yyless() here */
- yyless(0);
- G_endprev_str[0] = '\0';
- return (yylval=ENDPREV_TOK);
- } else {
- BEGIN DCOMMAND;
- }
- }
-
- <ITEXT>"%" { S_print("ichar %");
- if (in_letter()) {
- /* need to complete current word, send
- * a ENDPREV token to the parser.
- */
- /* unput('%'); handle comment next time around */
- yyless(0);
- }
- return S_skipcomments();
- /* is a comment, until end-of-line */
- }
-
- "%" { S_skipcomments(); putchar('\n'); }
-
- <ITEXT>"a" {S_lex_ichar(A_TOK); return (yylval = A_TOK); }
- <ITEXT>"aa" {S_lex_ichar(AA_TOK); return (yylval = AA_TOK);}
- <ITEXT>"A" {S_lex_ichar(AA_TOK); return (yylval = AA_TOK);}
- <ITEXT>"i" {S_lex_ichar(I_TOK); return (yylval = I_TOK); }
- <ITEXT>"ii" {S_lex_ichar(II_TOK); return (yylval = II_TOK);}
- <ITEXT>"I" {S_lex_ichar(II_TOK); return (yylval = II_TOK);}
- <ITEXT>"u" {S_lex_ichar(U_TOK); return (yylval = U_TOK); }
- <ITEXT>"U" {S_lex_ichar(UU_TOK); return (yylval = UU_TOK);}
- <ITEXT>"uu" {S_lex_ichar(UU_TOK); return (yylval = UU_TOK);}
- <ITEXT>"Ri" {
- switch(S_lang_tok) {
- case TAMIL_TOK:
- /* unput('i'); ?? use yyless() here XXX */
- yyless(1);
- return (yylval = RRA_TOK);
- break;
- default:
- return (yylval = RI_TOK);
- }
- }
- <ITEXT>"RI" {
- switch(S_lang_tok) {
- case TAMIL_TOK:
- /* unput('I'); ?? use yyless() here XXX */
- yyless(1);
- return (yylval = RRA_TOK);
- break;
- default:
- return (yylval = RII_TOK);
- }
- }
- <ITEXT>"Li" {
- switch(S_lang_tok) {
- case TAMIL_TOK:
- /* unput('i'); ?? use yyless() here XXX */
- yyless(1);
- return (yylval = LDA_TOK);
- break;
- default:
- return (yylval = LI_TOK);
- }
- }
- <ITEXT>"LI" {
- switch(S_lang_tok) {
- case TAMIL_TOK:
- /* unput('I'); ?? use yyless() here XXX */
- yyless(1);
- return (yylval = LDA_TOK);
- break;
- default:
- return (yylval = LII_TOK);
- }
- }
- <ITEXT>"e" {S_lex_ichar(AY_TOK); return (yylval = AY_TOK);}
- <ITEXT>"E" {S_lex_ichar(AAY_TOK); return (yylval = AAY_TOK);}
- <ITEXT>"ai" {S_lex_ichar(AI_TOK); return (yylval = AI_TOK);}
- <ITEXT>"o" {S_lex_ichar(O_TOK); return (yylval = O_TOK); }
- <ITEXT>"O" {S_lex_ichar(OO_TOK); return (yylval = OO_TOK);}
- <ITEXT>"au" {S_lex_ichar(AU_TOK); return (yylval = AU_TOK);}
- <ITEXT>"aM" {S_lex_ichar(AM_TOK); return (yylval = AM_TOK);}
- <ITEXT>"H" {S_lex_ichar(AHA_TOK); return (yylval = AHA_TOK);}
- <ITEXT>"k" {S_lex_ichar(KA_TOK); return (yylval = KA_TOK);}
- <ITEXT>"q" {S_lex_ichar(KADOT_TOK); return (yylval = KADOT_TOK);}
- <ITEXT>"kh" {S_lex_ichar(KHA_TOK); return (yylval = KHA_TOK);}
- <ITEXT>"K" {S_lex_ichar(KHADOT_TOK); return (yylval = KHADOT_TOK);}
- <ITEXT>"gh" {S_lex_ichar(GHA_TOK); return (yylval = GHA_TOK);}
- <ITEXT>"g" {S_lex_ichar(GA_TOK); return (yylval = GA_TOK);}
- <ITEXT>"G" {S_lex_ichar(GADOT_TOK); return (yylval = GADOT_TOK);}
- <ITEXT>"ng" {S_lex_ichar(NGA_TOK); return (yylval = NGA_TOK);}
- <ITEXT>"chh" {S_lex_ichar(CHHA_TOK); return (yylval = CHHA_TOK);}
- <ITEXT>"ch" {S_lex_ichar(CHA_TOK); return (yylval = CHA_TOK);}
- <ITEXT>"j" {S_lex_ichar(JA_TOK); return (yylval = JA_TOK);}
- <ITEXT>"z" {S_lex_ichar(JADOT_TOK); return (yylval = JADOT_TOK);}
- <ITEXT>"jh" {S_lex_ichar(JHA_TOK); return (yylval = JHA_TOK);}
- <ITEXT>"jn" {S_lex_ichar(JNH_TOK); return (yylval = JNH_TOK);}
- <ITEXT>"T" {S_lex_ichar(TTA_TOK); return (yylval = TTA_TOK);}
- <ITEXT>"Th" {S_lex_ichar(TTHA_TOK); return (yylval = TTHA_TOK);}
- <ITEXT>"D" {S_lex_ichar(DDA_TOK); return (yylval = DDA_TOK);}
- <ITEXT>".D" {S_lex_ichar(DDADOT_TOK);return (yylval=DDADOT_TOK);}
- <ITEXT>"Dh" {S_lex_ichar(DDHA_TOK); return (yylval = DDHA_TOK);}
- <ITEXT>".Dh" {S_lex_ichar(DDHADOT_TOK);return (yylval=DDHADOT_TOK);}
- <ITEXT>"N" {S_lex_ichar(NNA_TOK); return (yylval = NNA_TOK);}
- <ITEXT>"t" {S_lex_ichar(TA_TOK); return (yylval = TA_TOK);}
- <ITEXT>"th" {S_lex_ichar(THA_TOK); return (yylval = THA_TOK);}
- <ITEXT>"d" {S_lex_ichar(DA_TOK); return (yylval = DA_TOK);}
- <ITEXT>"dh" {S_lex_ichar(DHA_TOK); return (yylval = DHA_TOK);}
- <ITEXT>"n" {S_lex_ichar(NA_TOK); return (yylval = NA_TOK);}
- <ITEXT>"p" {S_lex_ichar(PA_TOK); return (yylval = PA_TOK);}
- <ITEXT>"ph" {S_lex_ichar(PHA_TOK); return (yylval = PHA_TOK);}
- <ITEXT>"f" {S_lex_ichar(PHADOT_TOK); return (yylval = PHADOT_TOK);}
- <ITEXT>"bh" {S_lex_ichar(BHA_TOK); return (yylval = BHA_TOK);}
- <ITEXT>"b" {S_lex_ichar(BA_TOK); return (yylval = BA_TOK);}
- <ITEXT>"m" {S_lex_ichar(MA_TOK); return (yylval = MA_TOK);}
- <ITEXT>"y" {S_lex_ichar(YA_TOK); return (yylval = YA_TOK);}
- <ITEXT>"r" {S_lex_ichar(RA_TOK); return (yylval = RA_TOK);}
- <ITEXT>"v" {S_lex_ichar(VA_TOK); return (yylval = VA_TOK);}
- <ITEXT>"shh" {S_lex_ichar(SHHA_TOK); return (yylval = SHHA_TOK);}
- <ITEXT>"sh" {S_lex_ichar(SHA_TOK); return (yylval = SHA_TOK);}
- <ITEXT>"s" {S_lex_ichar(SA_TOK); return (yylval = SA_TOK);}
- <ITEXT>"h" {S_lex_ichar(HA_TOK); return (yylval = HA_TOK);}
- <ITEXT>"ld" {S_lex_ichar(LDA_TOK); return (yylval = LDA_TOK);}
- <ITEXT>"L" {S_lex_ichar(LDA_TOK); return (yylval = LDA_TOK);}
- <ITEXT>"l" {S_lex_ichar(LA_TOK); return (yylval = LA_TOK);}
- <ITEXT>"ksh" {S_lex_ichar(KSHA_TOK); return (yylval = KSHA_TOK);}
- <ITEXT>"x" {S_lex_ichar(KSHA_TOK); return (yylval = KSHA_TOK);}
- <ITEXT>"gy" {S_lex_ichar(GYA_TOK); return (yylval = GYA_TOK);}
-
- <ITEXT>"ny" {
- switch(S_lang_tok) {
- case TAMIL_TOK:
- return (yylval = NYA_TOK);
- break;
- default:
- /* unput('y'); ?? use yyless() here XXX */
- yyless(1);
- return (yylval = NA_TOK);
- }
- }
- <ITEXT>"n^" { return (yylval = NNX_TOK);} /* Tamil Only */
-
- <ITEXT>"R" {S_lex_ichar(RRA_TOK); return (yylval = RRA_TOK);}
-
- <ITEXT>"AUM" {S_lex_ichar(AUM_TOK); return (yylval = AUM_TOK);}
- <ITEXT>"SRI" {S_lex_ichar(SRI_TOK); return (yylval = SRI_TOK);}
- <ITEXT>".r" {S_lex_ichar(RA_HALF_TOK);return (yylval=RA_HALF_TOK);}
- <ITEXT>".n" {S_lex_ichar(ANUSVARA_TOK);return(yylval=ANUSVARA_TOK);}
- <ITEXT>".h" {S_lex_ichar(VIRAAM_TOK);return(yylval=VIRAAM_TOK);}
- <ITEXT>".c" {S_lex_ichar(CHANDRA_TOK);return(yylval= CHANDRA_TOK);}
- <ITEXT>".C" {S_lex_ichar(CHANDRA_BN_TOK);return(yylval=CHANDRA_BN_TOK);}
- <ITEXT>".a" {S_lex_ichar(AVAGRAHA_TOK);return(yylval=AVAGRAHA_TOK);}
-
- <ITEXT>[ \t] {S_print("ichar \t");return (yylval = BLANK_TOK);}
- <ITEXT>[\n] {S_print("ichar \n");G_lineno++;return(yylval=NEWLINE_TOK);}
-
- <ITEXT>"{}" { S_print("ichar {}");
- if (in_letter()) {
- return (yylval=NOLIG_TOK);
- } else {
- strcpy(G_endprev_str, yytext);
- return (yylval=ENDPREV_TOK);
- }
- }
-
- <ITEXT>. { S_print("ichar .");
- strcpy(G_endprev_str, yytext);
- return (yylval=ENDPREV_TOK);
- }
-
- <DCOMMAND>[a-zA-Z][\.\-a-zA-Z0-9]* {
- S_print("dcommand a-z");
- BEGIN ITEXT;
- return (COMMAND_TOK);
- }
-
- <DCOMMAND>. {
- S_print("dcommand other");
- process_otherchar('\\');
- process_otherchar(yytext[0]);
- BEGIN ITEXT;
- }
-
- \\indian[ \t]* {BEGIN ITEXT; return (S_lang_tok = ILANG_TOK);}
- \\endindian[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\marathi[ \t]* {BEGIN ITEXT; return (S_lang_tok = MARATHI_TOK);}
- \\endmarathi[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\hindi[ \t]* {BEGIN ITEXT; return (S_lang_tok = HINDI_TOK);}
- \\endhindi[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\tamil[ \t]* {BEGIN ITEXT; return (S_lang_tok = TAMIL_TOK);}
- \\endtamil[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\sanskrit[ \t]* {BEGIN ITEXT; return (S_lang_tok = SANSKRIT_TOK);}
- \\endsanskrit[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\bengali[ \t]* {BEGIN ITEXT; return (S_lang_tok = BENGALI_TOK);}
- \\endbengali[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\telugu[ \t]* {BEGIN ITEXT; return (S_lang_tok = TELUGU_TOK);}
- \\endtelugu[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\malayalam[ \t]* {BEGIN ITEXT; return (S_lang_tok = MALAYALAM_TOK);}
- \\endmalayalam[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\gujarati[ \t]* {BEGIN ITEXT; return (S_lang_tok = GUJARATI_TOK);}
- \\endgujarati[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\oriya[ \t]* {BEGIN ITEXT; return (S_lang_tok = ORIYA_TOK);}
- \\endoriya[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
- \\kannada[ \t]* {BEGIN ITEXT; return (S_lang_tok = KANNADA_TOK);}
- \\endkannada[ \t]* {BEGIN 0; return (S_lang_tok = ENDLANG_TOK);}
-
- \\indianifm=[a-zA-Z0-9_\-\.]* {yylval = ILANG_TOK; return SETIFM_TOK;}
- \\marathiifm=[a-zA-Z0-9_\-\.]* {yylval = MARATHI_TOK; return SETIFM_TOK;}
- \\hindiifm=[a-zA-Z0-9_\-\.]* {yylval = HINDI_TOK; return SETIFM_TOK;}
- \\tamilifm=[a-zA-Z0-9_\-\.]* {yylval = TAMIL_TOK; return SETIFM_TOK;}
- \\sanskritifm=[a-zA-Z0-9_\-\.]* {yylval = SANSKRIT_TOK; return SETIFM_TOK;}
- \\bengaliifm=[a-zA-Z0-9_\-\.]* {yylval = BENGALI_TOK; return SETIFM_TOK;}
- \\teluguifm=[a-zA-Z0-9_\-\.]* {yylval = TELUGU_TOK; return SETIFM_TOK;}
- \\malayalamifm=[a-zA-Z0-9_\-\.]* {yylval = MALAYALAM_TOK; return SETIFM_TOK;}
- \\gujaratiifm=[a-zA-Z0-9_\-\.]* {yylval = GUJARATI_TOK; return SETIFM_TOK;}
- \\oriyaifm=[a-zA-Z0-9_\-\.]* {yylval = ORIYA_TOK; return SETIFM_TOK;}
- \\kannadaifm=[a-zA-Z0-9_\-\.]* {yylval = KANNADA_TOK; return SETIFM_TOK;}
-
- \\indianfont=[a-zA-Z0-9_\-\.\\]* {yylval = ILANG_TOK; return SETFONT_TOK;}
- \\hindifont=[a-zA-Z0-9_\-\.\\]* {yylval = HINDI_TOK; return SETFONT_TOK;}
- \\tamilfont=[a-zA-Z0-9_\-\.\\]* {yylval = TAMIL_TOK; return SETFONT_TOK;}
- \\marathifont=[a-zA-Z0-9_\-\.\\]* {yylval = MARATHI_TOK; return SETFONT_TOK;}
- \\sanskritfont=[a-zA-Z0-9_\-\.\\]* {yylval = SANSKRIT_TOK; return SETFONT_TOK;}
- \\bengalifont=[a-zA-Z0-9_\-\.\\]* {yylval = BENGALI_TOK; return SETFONT_TOK;}
- \\telugufont=[a-zA-Z0-9_\-\.\\]* {yylval = TELUGU_TOK; return SETFONT_TOK;}
- \\malayalamfont=[a-zA-Z0-9_\-\.\\]* {yylval = MALAYALAM_TOK;return SETFONT_TOK;}
- \\gujaratifont=[a-zA-Z0-9_\-\.\\]* {yylval = GUJARATI_TOK; return SETFONT_TOK;}
- \\oriyafont=[a-zA-Z0-9_\-\.\\]* {yylval = ORIYA_TOK; return SETFONT_TOK;}
- \\kannadafont=[a-zA-Z0-9_\-\.\\]* {yylval = KANNADA_TOK; return SETFONT_TOK;}
-
- \n {S_print("got newline\n");G_lineno++; ECHO; }
- . {S_print("everything else! got .");ECHO; }
-
- %%
-
- static S_lex_ichar(int t)
- {
- #ifdef DEBUG
- if (t > 255) fprintf(stderr, "LEXER: have seen <%s> tok %d (I %d)\n",
- yytext, t, _I_(t));
- else fprintf(stderr, "LEXER: seen char %c (dec %d)\n", t, t);
- #endif /*DEBUG*/
- return TRUE;
- }
- static S_print(char* t)
- {
- #ifdef DEBUG
- fprintf(stderr, "LEXERprint: have seen <%s> rule is %s\n",
- yytext, t);
- #endif /*DEBUG*/
- return TRUE;
- }
-
- static S_skipcomments()
- {
- int c;
- if (in_letter()) {
- G_endprev_str[0] = '\0';
- return (yylval=ENDPREV_TOK);
- } else {
- /* this the next time around, eat up the comment */
- putchar('%');
- while ( (c = input()) != EOF && c != '\n')
- putchar(c);
-
- G_lineno++;
- return (yylval=NEWLINE_TOK);
- }
- }
-
- #ifdef NOLEX
- /* if the system is missing lex, add these functions */
- #ifdef yywrap
- #undef yywrap
- #endif
- int yywrap() { return 1; }
-
- int yyless(int n) /* NOT TESTED!*/
- {
- int i;
- extern int yyleng;
- for (i = yyleng - 1; i >= n; i --) unput(yytext[i]);
- }
- #endif
-
-